School Transition Data

First research question: How do the discrepancies between child-perceived parental warmth & hostility and parent-perceived parental warmth & hostility change from wave 1 to wave 2?

Since pupils go through secondary school transition from wave 1 to wave 2, our hypothesis is the discrepancies will significantly differ due to the stressful event of school transitioning.

Second research question: Do the discrepancies differ for mothers and fathers? In other words, do moms or dads tend to have higher discrepancy in their parental warmth and hostility? Do moms or dads’ discrepancies change more following school transitioning?

Third research question: How does parental warmth and hostility discrepancy at wave 1 predict pupil’s mental health difficulties at wave 2?

library(haven)
schooltransitiondata <- read_sav("/Users/rluo/Documents/Longitudinal Data Analysis/Team-6/Data/SchoolTransitionData.sav")

Remember that lower scores means a higher extent—lower scores of warmth means more warmth and lower scores of hostility means more hostility

library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.4     ✔ readr     2.1.5
## ✔ forcats   1.0.0     ✔ stringr   1.5.1
## ✔ ggplot2   3.5.1     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.1
## ✔ purrr     1.0.2     
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
PWHdata <- schooltransitiondata %>%
  select(76:97, ID) %>%
  filter(!is.na(C1_PWHAa)) %>%
  filter(C1_PWHAa == 0 | C1_PWHBa == 0)

All of the children are living with either their mom or dad or both during the past 6 months.

I first look at child-perceived parental warmth and hostility at wave 1

MaternalPWHdata <- PWHdata %>%
  select(1:11, ID) %>%
  na.omit %>%
  mutate(hostility = C1_PWHA1 + C1_PWHA3 + C1_PWHA4 + C1_PWHA8) %>%
  mutate(warmth = C1_PWHA2 + C1_PWHA5 + C1_PWHA6 + C1_PWHA7 + C1_PWHA9 + C1_PWHA10)
PaternalPWHdata <- PWHdata %>%
  select(12:22, ID) %>%
  na.omit %>%
  mutate(hostility = C1_PWHB1 + C1_PWHB3 + C1_PWHB4 + C1_PWHB8) %>%
  mutate(warmth = C1_PWHB2 + C1_PWHB5 + C1_PWHB6 +C1_PWH7 + C1_PWHB9 +C1_PWHB10)
Maternal <- MaternalPWHdata %>%
  select(warmth, hostility, ID)
Paternal <- PaternalPWHdata %>%
  select(warmth, hostility, ID)

I filtered out all the rows with NA data

Overall <- rbind(Maternal, Paternal)
p1 <- ggplot(Overall, aes(x = warmth)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6")+
  ggtitle("Child-Perceived Parental Warmth at Wave 1")
p2 <- ggplot(Overall, aes(x = hostility)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6666")+
  ggtitle("Child-perceived Parental Hostility at Wave 1")
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## The following object is masked from 'package:dplyr':
## 
##     combine
p_density <- grid.arrange(p1, p2)
## Warning: The dot-dot notation (`..density..`) was deprecated in ggplot2 3.4.0.
## ℹ Please use `after_stat(density)` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

Now I look at parent-perceived parental warmth and hostility at wave 1

PWHdata_parents <- schooltransitiondata %>%
  select(418:427, ID) %>%
  na.omit
Parent_PWHdata<- PWHdata_parents %>%
  mutate(hostility_P = P1_PWH1 + P1_PWH3 + P1_PWH4 + P1_PWH8) %>%
  mutate(warmth_P = P1_PWH2 + P1_PWH5 + P1_PWH6 + P1_PWH7 + P1_PWH9 + P1_PWH10)
p3 <- ggplot(Parent_PWHdata, aes(x = warmth_P)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#dfd")+
  ggtitle("Parent-Perceived Parental Warmth at Wave 1")
p4 <- ggplot(Parent_PWHdata, aes(x = hostility_P)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#F2D") +
  ggtitle("Parent-Perceived Parental Hostility at Wave 1")
p_density2 <- grid.arrange(p3, p4)

Now I look at discrepancies (children - parents)

First of all, I will merge the parent- and children-perceived PWH data together based on ID.

Parent_PWHdata_overall <- Parent_PWHdata %>%
  select(ID, hostility_P, warmth_P)
Parent_PWHdata<- PWHdata_parents %>%
  mutate(hostility_P = P1_PWH1 + P1_PWH3 + P1_PWH4 + P1_PWH8) %>%
  mutate(warmth_P = P1_PWH2 + P1_PWH5 + P1_PWH6 + P1_PWH7 + P1_PWH9 + P1_PWH10)
Overall_P_and_C <- merge(Overall, Parent_PWHdata, by = "ID")
Overall_P_and_C %>%
  group_by(ID) %>%
  count()
## # A tibble: 607 × 2
## # Groups:   ID [607]
##    ID        n
##    <chr> <int>
##  1 01004     1
##  2 01008     2
##  3 01016     2
##  4 01018     2
##  5 01020     2
##  6 01022     1
##  7 01026     2
##  8 01032     2
##  9 01037     2
## 10 01039     2
## # ℹ 597 more rows

We need to know who wrote the parent-perceived parental warmth and hostility questionnaire (male =1, female =2)

PWHdata_parents <- schooltransitiondata %>%
  select(418:427,251, ID) %>%
  na.omit
Parent_PWHdata<- PWHdata_parents %>%
  mutate(hostility_P = P1_PWH1 + P1_PWH3 + P1_PWH4 + P1_PWH8) %>%
  mutate(warmth_P = P1_PWH2 + P1_PWH5 + P1_PWH6 + P1_PWH7 + P1_PWH9 + P1_PWH10)
Parent_PWHdata_overall <- Parent_PWHdata %>%
  select(ID, hostility_P, warmth_P, P1_ParentGender)
Overall_P_and_C <- merge(Overall, Parent_PWHdata_overall, by = "ID")

Now we need to separate maternal and paternal warmth and hostility

PaternalPupils <- Overall_P_and_C %>%
  filter(P1_ParentGender == 1) %>%
  select(ID) %>%
  unique() %>%
  as.list()

These pupils had their dad filled out the questionnaire, so we should focus on paternal warmth and hostility for these pupils.

Let’s first check if all the above IDs are in PaternalPWH data IDs

PaternalPupils %in% PaternalPWHdata$ID
## [1] FALSE

So no, not all children with their dad answering the questionnaire had their paternal PWH data. So now we need to filter out those who had their paternal PWH data

list <- as.list(PaternalPWHdata$ID)
commonlist <- as.list(intersect(unlist(PaternalPupils), unlist(list)))
newdata_childperceived <- PaternalPWHdata %>%
  filter(ID %in% commonlist) %>%
  select(ID, hostility, warmth)
newdata_childperceived
## # A tibble: 78 × 3
##    ID    hostility warmth
##    <chr>     <dbl>  <dbl>
##  1 01008        24      6
##  2 01018        16     13
##  3 01022        20     22
##  4 01037        24      9
##  5 01053        25      8
##  6 01105        22     10
##  7 01118        16      6
##  8 01144        16      6
##  9 01156        28      7
## 10 01163        22     21
## # ℹ 68 more rows

This is the list of child-perceived parental warmth that can be compared with father-perceived parental warmth.

Father_PWHdata_overall <- Parent_PWHdata_overall %>%
  filter(P1_ParentGender == 1)

This is the list of parent-perceived parental warmth (dad). Now we check if all IDs in the first list is exactly the same as the IDs in the second list

newdata_childperceived$ID %in% Father_PWHdata_overall$ID
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [76] TRUE TRUE TRUE

Yes, all the IDs in this new child-perceived dataframe is in father-perceived PWH dataframe

PaternalDataset1 <- merge(newdata_childperceived, Father_PWHdata_overall) %>%
  as_tibble
PaternalDataset1 <- PaternalDataset1 %>%
  rename( hostility_C1 = hostility,
        warmth_C1 = warmth,
        hostility_P1 = hostility_P,
        warmth_P1 = warmth_P)
PaternalDataset1 <- PaternalDataset1 %>%
  select(ID, hostility_C1, warmth_C1, hostility_P1, warmth_P1) %>%
  mutate (hostility_Discrepancies = hostility_C1 - hostility_P1) %>%
  mutate(warmth_Discrepancies = warmth_C1 - warmth_P1)

Now we explore the distribution of the paternal discrepancies of warmth and hostility.

p5 <- ggplot(PaternalDataset1, aes(x = hostility_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6")+
  ggtitle("Paternal Hostility Discrepancy at Wave 1")
p6 <- ggplot(PaternalDataset1, aes(x = warmth_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6666")+
  ggtitle("Paternal Warmth Discrepancy at Wave 1")
p_density3 <- grid.arrange(p5, p6)

We shall pause and do some interpretations here: Lower number means more: for example, a lower number of warmth means that there is MORE warmth in the parental relationship; similarly, a high number of hostility means that there is LESS hostility in the parental relationship. So if the hostility discrepancy is a large positive number, it means that the child has a higher perceived hostility score, meaning that the child thinks that the parent is being LESS hostile than the parents think they are. On the other hand, if the hostility discrepancy is a large negative number, it means that the child has a lower perceived hostility score, meaning that the child thinks that the parent is being MORE hostile than the parents thinks they are. In similar senses, a large positive warmth discrepancy number means that the child has a higher perceived warmth score, meaning that the child thinks that the parent is being LESS warm than the parents think they are. A large negative warmth discrepancy number means that the child has a lower perceived warmth score, meaning that the child thinks that the parent is being MORE warm than the parents think they are.

We can observe from the distribution shape that hostility discrepancy is left skewed while warmth discrepancy is right skewed. And this means that parents generally tend to think they are more hostile and less warm than the children think they are. This could be a demonstration of parental guilt—parents sometime tend to over-reflect on their parental behaviors. Note that this is only the paternal data though. The next step is to see if similar patterns show on maternal data.

Now let’s look at the discrepancy for maternal data.

MaternalPupils <- Parent_PWHdata_overall %>%
  filter(P1_ParentGender == 2) %>%
  select(ID) %>%
  unique() %>%
  as.list()
list1 <- as.list(MaternalPWHdata$ID)
commonlist1 <- as.list(intersect(unlist(MaternalPupils), unlist(list1)))
newdata_childperceived_m <- MaternalPWHdata %>%
  filter(ID %in% commonlist1) %>%
  select(ID, hostility, warmth)
Mother_PWHdata_overall <- Parent_PWHdata_overall %>%
  filter(P1_ParentGender == 2)
newdata_childperceived_m$ID %in% Mother_PWHdata_overall$ID
##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [256] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [286] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [316] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [331] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [346] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [361] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [376] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [391] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [406] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [421] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [436] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [451] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [466] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [481] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

We have much more maternal data than paternal data; more mothers filled out the parental warmth forms than the fathers.

MaternalDataset1 <- merge(newdata_childperceived_m, Mother_PWHdata_overall) %>%
  as_tibble
MaternalDataset1 <- MaternalDataset1 %>%
  rename( hostility_C1 = hostility,
          warmth_C1 = warmth,
          hostility_P1 = hostility_P,
          warmth_P1 = warmth_P)
MaternalDataset1 <- MaternalDataset1 %>%
  select(ID, hostility_C1, warmth_C1, hostility_P1, warmth_P1) %>%
  mutate (hostility_Discrepancies = hostility_C1 - hostility_P1) %>%
  mutate(warmth_Discrepancies = warmth_C1 - warmth_P1)
p7 <- ggplot(MaternalDataset1, aes(x = hostility_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#0000FF7D")+
  ggtitle("Maternal Hostility Discrepancy at Wave 1")
p8 <- ggplot(MaternalDataset1, aes(x = warmth_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#d2d")+
  ggtitle("Maternal Warmth Discrepancy at Wave 1")
p_density4 <- grid.arrange(p7, p8)

p_overall_1 <- grid.arrange(p5, p7, p6, p8, nrow = 2, ncol = 2)

We can see that the patterns for paternal and maternal data are actually quite similar. I would say that the hostility discrepancy resembles more of a normal distribution and the warmth discrepancy data has a larger and more prominent peak. This could potentially mean that mothers are more accurate in terms of their self-reflected parental warmth and hostility (at least more consistent with children’s feedback). The degree of “parental guilt” as described before is less prominent among mothers than fathers.

Okay, now we can do the same thing for the Wave 2 data.

PWHdataWave2 <- schooltransitiondata %>%
  select(553:574, ID) %>%
  filter(!is.na(C2_PWHAa)) %>%
  filter(!is.na(C2_PWHBa)) %>%
  filter(C2_PWHAa == 0 | C2_PWHBa == 0) %>%
  na.omit
MaternalPWHdataWave2 <- PWHdataWave2 %>%
  select(1:11, ID) %>%
  mutate(hostility = C2_PWHA1 + C2_PWHA3 + C2_PWHA4 + C2_PWHA8) %>%
  mutate(warmth = C2_PWHA2 + C2_PWHA5 + C2_PWHA6 + C2_PWHA7 + C2_PWHA9 + C2_PWHA10)
PaternalPWHdataWave2 <- PWHdataWave2 %>%
  select(12:22, ID) %>%
  mutate(hostility = C2_PWHB1 + C2_PWHB3 + C2_PWHB4 + C2_PWHB8) %>%
  mutate(warmth = C2_PWHB2 + C2_PWHB5 + C2_PWHB6 +C2_PWHB7 + C2_PWHB9 +C2_PWHB10)
MaternalWave2 <- MaternalPWHdataWave2 %>%
  select(warmth, hostility, ID)
PaternalWave2 <- PaternalPWHdataWave2 %>%
  select(warmth, hostility, ID)
OverallWave2 <- rbind(MaternalWave2, PaternalWave2)
p9 <- ggplot(OverallWave2, aes(x = warmth)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#d2d") +
  ggtitle("Child-Perceived Parental Warmth at Wave 2")
p10 <- ggplot(OverallWave2, aes(x = hostility)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#DFD")+
  ggtitle("Child-Perceived Parental Hostility at Wave 2")
grid.arrange(p9, p10)

grid.arrange(p1, p9, p2, p10, nrow = 2, ncol = 2)

From here we can see that the child-perceived parental warmth and hostility are actually very similar for wave 1 and wave 2. One difference that is worth noting is that for wave 2 hostility data, there is a huge peak at the very right, which means that parental hostility actually goes down at wave 3. This makes sense because if we look at the far-left peak at wave 2 for warmth, it is actually higher than that in Wave 2. Generally, parents are warmer and less hostile toward their children at wave 2.

PWHdata_parents_Wave2 <- schooltransitiondata %>%
  select(732:741, ID, P2_ParentGender) %>%
  na.omit
Parent_PWHdata_Wave2<- PWHdata_parents_Wave2 %>%
  mutate(hostility_P2 = P2_PWH1 + P2_PWH3 + P2_PWH4 + P2_PWH8) %>%
  mutate(warmth_P2 = P2_PWH2 + P2_PWH5 + P2_PWH6 + P2_PWH7 + P2_PWH9 + P2_PWH10)
p11 <- ggplot(Parent_PWHdata_Wave2, aes(x = warmth_P2)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#0000FF7D")+
  ggtitle("Parent-Perceived Parental Warmth at Wave 2")
p12 <- ggplot(Parent_PWHdata_Wave2, aes(x = hostility_P2)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 1,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6") +
  ggtitle("Parent-Perceived Parental Hostility at Wave 2")
grid.arrange(p11, p12)

grid.arrange(p3, p11, p4, p12)

grid.arrange(p1, p9, p3, p11, p2, p10, p4, p12, nrow = 2, ncol = 4)

From this graph, we can see that the discrepancy between wave 2 hostility child-perceived and parent-perceived is the greatest. We shall know more about it once we conducted discrepancy analyses.

Now we can calculate the discrepancies at Wave 2.

Again, we need to know who wrote the questionnaire at wave 2 (mom or dad).

PaternalPupilsWave2 <- Parent_PWHdata_Wave2 %>%
  filter(P2_ParentGender == 1) %>%
  select(ID) %>%
  unique() %>%
  as.list()

This is the list of pupils that have their dad filled out the PWH data at wave 2.

PaternalPupilsWave2 %in% PaternalPWHdataWave2$ID
## [1] FALSE
list2 <- as.list(PaternalPWHdataWave2$ID)
commonlist2 <- as.list(intersect(unlist(PaternalPupilsWave2), unlist(list2)))
newdata_childperceived_p_wave2 <- PaternalPWHdataWave2 %>%
  filter(ID %in% commonlist2) %>%
  select(ID, hostility, warmth)
Father_PWHdata_overall_wave2 <- Parent_PWHdata_Wave2 %>%
  filter(P2_ParentGender == 1) %>%
  select(ID, hostility_P2, warmth_P2)
newdata_childperceived_p_wave2$ID %in% Father_PWHdata_overall_wave2$ID
##  [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE

Now we combine the child-perceived and dad-perceived data together.

PaternalDataset2 <- merge(newdata_childperceived_p_wave2, Father_PWHdata_overall_wave2) %>%
  as_tibble
PaternalDataset2 <- PaternalDataset2 %>%
  rename(hostility_C2 = hostility,
         warmth_C2 = warmth) 
PaternalDataset2 <- PaternalDataset2 %>%
  mutate(hostility_Discrepancies = hostility_C2 - hostility_P2,
         warmth_Discrepancies = warmth_C2 - warmth_P2)
p13 <- ggplot(PaternalDataset2, aes(x = hostility_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6")+
  ggtitle("Paternal Hostility Discrepancy at Wave 2")
p14 <- ggplot(PaternalDataset2, aes(x = warmth_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6666")+
  ggtitle("Paternal Warmth Discrepancy at Wave 2")
grid.arrange(p13, p14)

If the hostility discrepancy is a large positive number, it means that the child has a higher perceived hostility score, meaning that the child thinks that the parent is being LESS hostile than the parents think they are. On the other hand, if the hostility discrepancy is a large negative number, it means that the child has a lower perceived hostility score, meaning that the child thinks that the parent is being MORE hostile than the parents thinks they are. In similar senses, a large positive warmth discrepancy number means that the child has a higher perceived warmth score, meaning that the child thinks that the parent is being LESS warm than the parents think they are. A large negative warmth discrepancy number means that the child has a lower perceived warmth score, meaning that the child thinks that the parent is being MORE warm than the parents think they are.

We can observe from the distribution shape that hostility discrepancy is left skewed while warmth discrepancy is right skewed. And this means that parents generally tend to think they are more hostile and less warm than the children think they are. This could be a demonstration of parental guilt—parents sometime tend to over-reflect on their parental behaviors. Note that this is only the paternal data though. The next step is to see if similar patterns show on maternal data. Basically the same patterns maintained at wave 2. We shall compare the distribution side by side to have a better comparison.

But we shall look at maternal data first.

MaternalPupilsWave2 <- Parent_PWHdata_Wave2 %>%
  filter(P2_ParentGender == 2) %>%
  select(ID) %>%
  unique() %>%
  as.list()
MaternalPupilsWave2 %in% MaternalPWHdataWave2$ID
## [1] FALSE
list3 <- as.list(MaternalPWHdataWave2$ID)
commonlist3 <- as.list(intersect(unlist(MaternalPupilsWave2), unlist(list3)))
newdata_childperceived_m_wave2 <- MaternalPWHdataWave2 %>%
  filter(ID %in% commonlist3) %>%
  select(ID, hostility, warmth)
Mother_PWHdata_overall_wave2 <- Parent_PWHdata_Wave2 %>%
  filter(P2_ParentGender == 2) %>%
  select(ID, hostility_P2, warmth_P2)
newdata_childperceived_m_wave2$ID %in% Mother_PWHdata_overall_wave2$ID
##   [1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [16] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [31] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [46] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [61] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [76] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
##  [91] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [106] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [121] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [136] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [151] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [166] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [181] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [196] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [211] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [226] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [241] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [256] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [271] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [286] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [301] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
## [316] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
MaternalDataset2 <- merge(newdata_childperceived_m_wave2, Mother_PWHdata_overall_wave2) %>%
  as_tibble
MaternalDataset2 <- MaternalDataset2 %>%
  rename(hostility_C2 = hostility,
         warmth_C2 = warmth) 
MaternalDataset2 <- MaternalDataset2 %>%
  mutate(hostility_Discrepancies = hostility_C2 - hostility_P2,
         warmth_Discrepancies = warmth_C2 - warmth_P2)
p15 <- ggplot(MaternalDataset2, aes(x = hostility_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#0000FF7D")+
  ggtitle("Maternal Hostility Discrepancy at Wave 2")
p16 <- ggplot(MaternalDataset2, aes(x = warmth_Discrepancies)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#d2d") +
  ggtitle("Maternal Warmth Discrepancy at Wave 2")
grid.arrange(p15, p16)

Again, maternal discrepancies distributions are less skewed than that of paternal ones. There is an apparent outlier in the warmth discrepancy data though which is worth noting. Now we can put wave 2 discrepancy data together.

grid.arrange(p13, p15, p14, p16, nrow = 2, ncol = 2)

grid.arrange(p5, p13, p7, p15, p6, p14, p8, p16, nrow = 2, ncol = 4)

Now I need to create a new combined data set in order to proceed with data analysis.

Let’s look at the paternal dataset first to see how many people dropped out after first wave.

PaternalDataset2$ID %in% PaternalDataset1$ID
##  [1]  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE
## [13] FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
## [25]  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
## [37]  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE
## [49] FALSE FALSE  TRUE FALSE  TRUE FALSE
PaternalDataset1 <- PaternalDataset1 %>%
  mutate(ParentGender = "Male",
         WaveNumber = "Wave1")
PaternalDataset2 <- PaternalDataset2 %>%
  mutate(ParentGender = "Male",
         WaveNumber = "Wave2")
PaternalDataOverall <- bind_rows(PaternalDataset1, PaternalDataset2)
PaternalDataOverall %>%
  group_by(ID) %>%
  count()
## # A tibble: 109 × 2
## # Groups:   ID [109]
##    ID        n
##    <chr> <int>
##  1 01008     1
##  2 01018     1
##  3 01022     1
##  4 01037     2
##  5 01053     1
##  6 01093     1
##  7 01105     1
##  8 01118     1
##  9 01139     1
## 10 01140     1
## # ℹ 99 more rows

And then let’s look at maternal dataset

MaternalDataset2$ID %in% MaternalDataset1$ID
##   [1]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE
##  [13]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE
##  [25]  TRUE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE
##  [37] FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
##  [49]  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE
##  [61] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [73]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE FALSE
##  [85]  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
##  [97]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE
## [109]  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [121]  TRUE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [133] FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [145]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [157]  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE
## [169]  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [181]  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE
## [193]  TRUE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE
## [205] FALSE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE
## [217]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE
## [229]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [241] FALSE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE
## [253] FALSE  TRUE FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
## [265] FALSE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE FALSE  TRUE  TRUE  TRUE
## [277] FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE
## [289]  TRUE  TRUE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE FALSE  TRUE  TRUE
## [301]  TRUE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
## [313]  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE
## [325]  TRUE  TRUE  TRUE FALSE
MaternalDataset1 <- MaternalDataset1 %>%
  mutate(ParentGender = "Female",
         WaveNumber = "Wave1")
MaternalDataset2 <- MaternalDataset2 %>%
  mutate(ParentGender = "Female",
         WaveNumber = "Wave2")
MaternalDataOverall <- bind_rows(MaternalDataset1, MaternalDataset2)
MaternalDataOverall %>%
  group_by(ID) %>%
  count() 
## # A tibble: 586 × 2
## # Groups:   ID [586]
##    ID        n
##    <chr> <int>
##  1 01004     1
##  2 01016     2
##  3 01020     2
##  4 01024     1
##  5 01026     1
##  6 01032     1
##  7 01039     2
##  8 01040     2
##  9 01041     1
## 10 01043     2
## # ℹ 576 more rows

Now we combine the maternal AND paternal data into a big dataframe

OverallData <- bind_rows(PaternalDataOverall, MaternalDataOverall) %>%
  as_tibble

Now I feel like it would make more sense to treat the discrepancy as absolute value. If I have more time, I think it would be even better to treat the positive value and negative value of discrepancy separately.

OverallData <- OverallData %>%
  mutate(Abs_hostility_Discrepancies = abs(hostility_Discrepancies),
         Abs_warmth_Discrepancies = abs(warmth_Discrepancies))

Multilevel modeling. So what i am doing is called a Mixed model ANOVA. This model is specifically used for 2 dichotomous independent variables, which is the case here.

library(lme4)
## Loading required package: Matrix
## 
## Attaching package: 'Matrix'
## The following objects are masked from 'package:tidyr':
## 
##     expand, pack, unpack
DataLoss <- OverallData %>%
  group_by(ID) %>%
  count()
DataLoss <- DataLoss %>%
  filter(n == 2)
OverallDataComplete <- OverallData %>%
  filter(ID %in% DataLoss$ID)
OverallDataComplete <- OverallDataComplete %>%
  mutate(ParentGender = factor(ParentGender, levels = c("Male", "Female")),
         WaveNumber = factor(WaveNumber, levels = c("Wave1", "Wave2")))
OverallDataComplete <- OverallDataComplete %>%
  mutate(WaveNumber= as.numeric(WaveNumber),
         ParentGender = as.numeric(ParentGender))

This generates a new dataset that include all individuals that have completed surveys in both wave 1 and wave 2

Null model

null_model1 <- lmer(Abs_hostility_Discrepancies ~ 1 + (1|ID), data = OverallDataComplete, REML = FALSE)
summary(null_model1)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: Abs_hostility_Discrepancies ~ 1 + (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   2682.2   2695.0  -1338.1   2676.2      533 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.6384 -0.6621 -0.1654  0.4069  4.6115 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 1.629    1.276   
##  Residual             7.151    2.674   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   3.4160     0.1394   24.51
library(performance)
performance::icc(null_model1)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.186
##   Unadjusted ICC: 0.186

ICC is interpreted as the proportion of variance between people. 18.6% of the variance in hostility discrepancy is attributed to a person.

Adding level-1 and Level-2 fixed and random effects

l1_model <- lmer(Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender + (1|ID), data = OverallDataComplete, REML = FALSE)
l1_model_wo_ParentGender <- lmer(Abs_hostility_Discrepancies ~ 1 + WaveNumber + (1|ID), data = OverallDataComplete, REML = FALSE)
summary(l1_model)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender +  
##     (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   2658.3   2679.7  -1324.2   2648.3      531 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9668 -0.6121 -0.1437  0.4855  4.1092 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 1.988    1.410   
##  Residual             6.441    2.538   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##              Estimate Std. Error t value
## (Intercept)    3.1322     0.8691   3.604
## WaveNumber     1.1259     0.2192   5.135
## ParentGender  -0.7449     0.4213  -1.768
## 
## Correlation of Fixed Effects:
##             (Intr) WvNmbr
## WaveNumber  -0.372       
## ParentGendr -0.912 -0.007
summary(l1_model_wo_ParentGender)
## Linear mixed model fit by maximum likelihood  ['lmerMod']
## Formula: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   2659.4   2676.6  -1325.7   2651.4      532 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.7895 -0.6234 -0.1498  0.4871  4.2872 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept) 1.945    1.394   
##  Residual             6.520    2.554   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error t value
## (Intercept)   1.7313     0.3590   4.822
## WaveNumber    1.1231     0.2206   5.092
## 
## Correlation of Fixed Effects:
##            (Intr)
## WaveNumber -0.922
anova(l1_model, l1_model_wo_ParentGender)
## Data: OverallDataComplete
## Models:
## l1_model_wo_ParentGender: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (1 | ID)
## l1_model: Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender + (1 | ID)
##                          npar    AIC    BIC  logLik deviance  Chisq Df
## l1_model_wo_ParentGender    4 2659.4 2676.6 -1325.7   2651.4          
## l1_model                    5 2658.3 2679.8 -1324.2   2648.3 3.1058  1
##                          Pr(>Chisq)  
## l1_model_wo_ParentGender             
## l1_model                    0.07802 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(null_model1, l1_model)
## Data: OverallDataComplete
## Models:
## null_model1: Abs_hostility_Discrepancies ~ 1 + (1 | ID)
## l1_model: Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender + (1 | ID)
##             npar    AIC    BIC  logLik deviance  Chisq Df Pr(>Chisq)    
## null_model1    3 2682.2 2695.0 -1338.1   2676.2                         
## l1_model       5 2658.3 2679.8 -1324.2   2648.3 27.851  2  8.959e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
anova(null_model1, l1_model_wo_ParentGender)
## Data: OverallDataComplete
## Models:
## null_model1: Abs_hostility_Discrepancies ~ 1 + (1 | ID)
## l1_model_wo_ParentGender: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (1 | ID)
##                          npar    AIC    BIC  logLik deviance  Chisq Df
## null_model1                 3 2682.2 2695.0 -1338.1   2676.2          
## l1_model_wo_ParentGender    4 2659.4 2676.6 -1325.7   2651.4 24.745  1
##                          Pr(>Chisq)    
## null_model1                            
## l1_model_wo_ParentGender  6.543e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

Intercept is 2.77, meaning that at wave 0 when the parent gender is male, the average hostility discrepancy across all individuals is 2.77. But we don’t have a wave 0, so this value is non-interpretable. Wave 2 has a 1.12 higher hostility discrepancy than wave 1 and dad has 0.74 higher hostility discrepancy on average.

Note that adding parent gender or not into the predictor did not significantly influence the deviance of the model, meaning that parent gender is likely a very weak predictor.

The model has significantly less deviance, so it is a better model.

library(lmerTest)
## 
## Attaching package: 'lmerTest'
## The following object is masked from 'package:lme4':
## 
##     lmer
## The following object is masked from 'package:stats':
## 
##     step
coef(summary(l1_model <- lmer(Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender + (1|ID), data = OverallDataComplete, REML = FALSE)))
##                Estimate Std. Error       df   t value     Pr(>|t|)
## (Intercept)   3.1321698  0.8690989 470.9211  3.603928 3.468333e-04
## WaveNumber    1.1259137  0.2192493 266.8901  5.135312 5.438271e-07
## ParentGender -0.7448839  0.4213050 397.8651 -1.768040 7.782062e-02

Adding random slope. Note that I did not add parent gender into this model since it is a insignificant predictor.

l1_random <- lmer(Abs_hostility_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber|ID) + (1|ID), data = OverallDataComplete, REML = FALSE)
## boundary (singular) fit: see help('isSingular')
summary(l1_random)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber |  
##     ID) + (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   2635.6   2657.0  -1312.8   2625.6      531 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9585 -0.6189 -0.1437  0.4436  3.1424 
## 
## Random effects:
##  Groups   Name        Variance  Std.Dev. 
##  ID       WaveNumber  1.266e+00 1.125e+00
##  ID.1     (Intercept) 8.313e-10 2.883e-05
##  Residual             5.299e+00 2.302e+00
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   1.7313     0.3144 268.0000   5.506 8.58e-08 ***
## WaveNumber    1.1231     0.2104 391.3807   5.338 1.60e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## WaveNumber -0.897
## optimizer (nloptwrap) convergence code: 0 (OK)
## boundary (singular) fit: see help('isSingular')

Singularity problem: 1|ID is very close to zero, meaning that the individual differences are very low. This indicates that the same individual tends to have very similar discrepancies between wave 1 and wave 2. So I removed the 1|ID element that explains the random effects of individuals.

l1_random <- lmer(Abs_hostility_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber|ID), data = OverallDataComplete, REML = FALSE)
summary(l1_random)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber |  
##     ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   2633.6   2650.7  -1312.8   2625.6      532 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9585 -0.6189 -0.1437  0.4436  3.1424 
## 
## Random effects:
##  Groups   Name       Variance Std.Dev.
##  ID       WaveNumber 1.266    1.125   
##  Residual            5.299    2.302   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   1.7313     0.3144 268.0000   5.506 8.58e-08 ***
## WaveNumber    1.1231     0.2104 391.3807   5.338 1.60e-07 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## WaveNumber -0.897
anova(l1_model_wo_ParentGender, l1_random)
## Data: OverallDataComplete
## Models:
## l1_model_wo_ParentGender: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (1 | ID)
## l1_random: Abs_hostility_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber | ID)
##                          npar    AIC    BIC  logLik deviance  Chisq Df
## l1_model_wo_ParentGender    4 2659.4 2676.6 -1325.7   2651.4          
## l1_random                   4 2633.6 2650.7 -1312.8   2625.6 25.854  0
##                          Pr(>Chisq)
## l1_model_wo_ParentGender           
## l1_random

Adding the random slope can decrease the deviance of the model, thus a good model.

Adding interaction terms.

crosslevel_model <- lmer(Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender + (0 + WaveNumber|ID) + WaveNumber:ParentGender, data = OverallDataComplete, REML = FALSE)
summary(crosslevel_model)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_hostility_Discrepancies ~ 1 + WaveNumber + ParentGender +  
##     (0 + WaveNumber | ID) + WaveNumber:ParentGender
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   2631.9   2657.6  -1310.0   2619.9      530 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -1.9515 -0.6080 -0.1255  0.4794  3.0172 
## 
## Random effects:
##  Groups   Name       Variance Std.Dev.
##  ID       WaveNumber 1.297    1.139   
##  Residual            5.176    2.275   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##                         Estimate Std. Error       df t value Pr(>|t|)   
## (Intercept)               5.0893     1.9131 294.5625   2.660  0.00823 **
## WaveNumber               -0.1116     1.2990 428.2865  -0.086  0.93159   
## ParentGender             -1.7830     1.0013 295.2214  -1.781  0.07599 . 
## WaveNumber:ParentGender   0.6562     0.6795 428.6173   0.966  0.33478   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WvNmbr PrntGn
## WaveNumber  -0.911              
## ParentGendr -0.987  0.899       
## WvNmbr:PrnG  0.900 -0.987 -0.911

Note that after adding the interaction term, the main effects of wave number and parent gender became insignificant. I will explain it with visualization later on. The Interaction is insignificant.

Let’s now look at parental warmths.

Null Model

null_model2 <- lmer(Abs_warmth_Discrepancies ~ 1 + (1|ID), data = OverallDataComplete, REML = FALSE)
summary(null_model2)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_warmth_Discrepancies ~ 1 + (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   3140.2   3153.1  -1567.1   3134.2      533 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8421 -0.5452 -0.2119  0.3216  6.6017 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept)  6.882   2.623   
##  Residual             14.532   3.812   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   4.0466     0.2298 268.0000   17.61   <2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
performance::icc(null_model1)
## # Intraclass Correlation Coefficient
## 
##     Adjusted ICC: 0.186
##   Unadjusted ICC: 0.186

ICC is interpreted as the proportion of variance between people. 18.6% of the variance in hostility discrepancy is attributed to a person.

Adding level 1 and level 2 fixed and random effects

l1_model1 <- lmer(Abs_warmth_Discrepancies ~ 1 + WaveNumber + ParentGender + (1|ID), data = OverallDataComplete, REML = FALSE)
summary(l1_model1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_warmth_Discrepancies ~ 1 + WaveNumber + ParentGender + (1 |  
##     ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   3134.6   3156.0  -1562.3   3124.6      531 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.7971 -0.5354 -0.2028  0.2645  6.5445 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept)  7.028   2.651   
##  Residual             14.092   3.754   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##              Estimate Std. Error       df t value Pr(>|t|)   
## (Intercept)    4.2444     1.3809 480.7884   3.074  0.00223 **
## WaveNumber     0.9323     0.3243 268.0083   2.875  0.00437 **
## ParentGender  -0.8462     0.6763 424.2445  -1.251  0.21155   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WvNmbr
## WaveNumber  -0.345       
## ParentGendr -0.921 -0.008

Parent Gender’s effect was less significant than hostility discrepancy. Now let’s remove parent gender and then compare.

l1_model1_wo_ParentGender <- lmer(Abs_warmth_Discrepancies ~ 1 + WaveNumber + (1|ID), data = OverallDataComplete, REML = FALSE)
summary(l1_model1_wo_ParentGender)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_warmth_Discrepancies ~ 1 + WaveNumber + (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   3134.1   3151.3  -1563.1   3126.1      532 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.8182 -0.5282 -0.2147  0.2662  6.5216 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       (Intercept)  7.098   2.664   
##  Residual             14.100   3.755   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   2.6530     0.5381 381.8551   4.930 1.23e-06 ***
## WaveNumber    0.9291     0.3244 268.0000   2.864  0.00451 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## WaveNumber -0.904
anova(l1_model1, l1_model1_wo_ParentGender)
## Data: OverallDataComplete
## Models:
## l1_model1_wo_ParentGender: Abs_warmth_Discrepancies ~ 1 + WaveNumber + (1 | ID)
## l1_model1: Abs_warmth_Discrepancies ~ 1 + WaveNumber + ParentGender + (1 | ID)
##                           npar    AIC    BIC  logLik deviance  Chisq Df
## l1_model1_wo_ParentGender    4 3134.1 3151.3 -1563.1   3126.1          
## l1_model1                    5 3134.6 3156.0 -1562.3   3124.6 1.5617  1
##                           Pr(>Chisq)
## l1_model1_wo_ParentGender           
## l1_model1                     0.2114

Again, parent gender is a weak predictor. Now we add random slopes.

l1_random1 <- lmer(Abs_warmth_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber|ID) + (1|ID), data = OverallDataComplete, REML = FALSE)
summary(l1_random1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_warmth_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber |  
##     ID) + (1 | ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   3119.6   3141.0  -1554.8   3109.6      531 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9841 -0.5003 -0.2131  0.2487  5.0749 
## 
## Random effects:
##  Groups   Name        Variance Std.Dev.
##  ID       WaveNumber   3.2584  1.8051  
##  ID.1     (Intercept)  0.5814  0.7625  
##  Residual             12.4708  3.5314  
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   2.6530     0.4846 268.0004   5.475 1.01e-07 ***
## WaveNumber    0.9291     0.3244 268.0236   2.864  0.00451 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## WaveNumber -0.888
l1_random1 <- lmer(Abs_warmth_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber|ID), data = OverallDataComplete, REML = FALSE)
summary(l1_random1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_warmth_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber |      ID)
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   3117.7   3134.8  -1554.8   3109.7      532 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9441 -0.4943 -0.2125  0.2565  5.0695 
## 
## Random effects:
##  Groups   Name       Variance Std.Dev.
##  ID       WaveNumber  3.444   1.856   
##  Residual            12.587   3.548   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##             Estimate Std. Error       df t value Pr(>|t|)    
## (Intercept)   2.6530     0.4846 268.0000   5.475 1.01e-07 ***
## WaveNumber    0.9291     0.3268 399.9072   2.843   0.0047 ** 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##            (Intr)
## WaveNumber -0.890
anova(l1_random1, l1_model1_wo_ParentGender)
## Data: OverallDataComplete
## Models:
## l1_random1: Abs_warmth_Discrepancies ~ 1 + WaveNumber + (0 + WaveNumber | ID)
## l1_model1_wo_ParentGender: Abs_warmth_Discrepancies ~ 1 + WaveNumber + (1 | ID)
##                           npar    AIC    BIC  logLik deviance Chisq Df
## l1_random1                   4 3117.7 3134.8 -1554.8   3109.7         
## l1_model1_wo_ParentGender    4 3134.1 3151.3 -1563.1   3126.1     0  0
##                           Pr(>Chisq)
## l1_random1                          
## l1_model1_wo_ParentGender

The deviance is way lower for the model that include the random slope. Now let’s include interaction.

crosslevel_model1 <- lmer(Abs_warmth_Discrepancies ~ 1 + WaveNumber + ParentGender + (0 + WaveNumber|ID) + WaveNumber:ParentGender, data = OverallDataComplete, REML = FALSE)
summary(crosslevel_model1)
## Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's
##   method [lmerModLmerTest]
## Formula: Abs_warmth_Discrepancies ~ 1 + WaveNumber + ParentGender + (0 +  
##     WaveNumber | ID) + WaveNumber:ParentGender
##    Data: OverallDataComplete
## 
##      AIC      BIC   logLik deviance df.resid 
##   3119.4   3145.1  -1553.7   3107.4      530 
## 
## Scaled residuals: 
##     Min      1Q  Median      3Q     Max 
## -2.9250 -0.5133 -0.2103  0.2674  5.0197 
## 
## Random effects:
##  Groups   Name       Variance Std.Dev.
##  ID       WaveNumber  3.432   1.853   
##  Residual            12.530   3.540   
## Number of obs: 536, groups:  ID, 268
## 
## Fixed effects:
##                         Estimate Std. Error       df t value Pr(>|t|)  
## (Intercept)               5.4789     2.9802 294.6098   1.838    0.067 .
## WaveNumber                0.1532     2.0340 430.9499   0.075    0.940  
## ParentGender             -1.5010     1.5599 295.2381  -0.962    0.337  
## WaveNumber:ParentGender   0.4131     1.0641 431.1766   0.388    0.698  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Correlation of Fixed Effects:
##             (Intr) WvNmbr PrntGn
## WaveNumber  -0.908              
## ParentGendr -0.987  0.896       
## WvNmbr:PrnG  0.897 -0.987 -0.909

Similarly, after adding the interaction term, the main effects of wave number of parent gender became insignificant.

In summary, from wave 1 to wave 2, the hostility discrepancy increased by 1.13. ALthough not significant (tendency to significance), moms tend to have 0.74 less discrepancy than dads. From wave 1 to wave 2, the warmth discrepancy increased by 0.93. Although not significant, moms have 0.85 less discrepancy than dads. Hostility discrepancy seems to have a more parent gender-based effect than warmth discrepancy. This could indicates that stressful events such as transitioning to secondary school can increase the discrepancy between parent-perceived and child-perceived parental warmth and hostility.

I am now trying to visualize the data.

p17 <- OverallDataComplete %>%
  group_by(WaveNumber, ParentGender) %>%
  mutate(hmean = mean(Abs_hostility_Discrepancies)) %>%
  ggplot(mapping = aes(x = WaveNumber, y = hmean, colour = factor(ParentGender) )) +
  geom_line() +
  labs(title = "Hostility Discrepancies Over Time", y = "Mean Hostility Discrepancies") +
  scale_x_discrete(limits = c(1, 2))
## Warning in scale_x_discrete(limits = c(1, 2)): Continuous limits supplied to discrete scale.
## ℹ Did you mean `limits = factor(...)` or `scale_*_continuous()`?

We can see here that major difference between wave 1 and wave 2 is contributed by mom. That is exactly why after adding interaction terms the main effects of wave number and parent gender became insignificant.

p18 <- OverallDataComplete %>%
  group_by(WaveNumber, ParentGender) %>%
  mutate(wmean = mean(Abs_warmth_Discrepancies)) %>%
  ggplot(mapping = aes(x = WaveNumber, y = wmean, colour = factor(ParentGender) )) +
  geom_line() +
  labs(title = "Warmth Discrepancies Over Time", y = "Mean Warmth Discrepancies") +
  scale_x_discrete(limits = c(1, 2))
## Warning in scale_x_discrete(limits = c(1, 2)): Continuous limits supplied to discrete scale.
## ℹ Did you mean `limits = factor(...)` or `scale_*_continuous()`?

We can see here that major difference between wave 1 and wave 2 is again contributed by mom. That is exactly why after adding interaction terms the main effects of wave number and parent gender became insignificant.

grid.arrange(p17, p18)

In summary, we can conclude that warmth and hostility discrepancies for moms seem to increase more drastically from wave 1 to wave 2. Dads tend to have an overall higher warmth and hostility discrepancies than mom.

I need to check the residual and homoscedacity.

l1residual <- residuals(l1_random)
head(l1residual)
##          1          2          3          4          5          6 
##  5.2336827  7.2336827 -2.3307971  0.1047231 -0.9840774 -1.8952769

After extracting the residuals, I have to make sure that the residuals are normally distributed.

p19 <- ggplot(OverallDataComplete, aes(x = l1residual)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6666") +
  ggtitle("Residual Distribution and QQ-Plot for Absolute Hostility Discrepancies")

Great, residuals is normally distributed. Let’s just double-check with qq-plot.

p20 <- OverallDataComplete %>%
  ggplot(mapping = aes(sample = l1residual)) +
  stat_qq()

grid.arrange(p19, p20)

Checked. The qq plot is linear, meaning that the residuals are roughly normally distributed. This is only for hostility Discrepancies.

l1residual1 <- residuals(l1_random1)
head(l1residual1)
##          1          2          3          4          5          6 
##  0.2965948 -1.4723086  4.0255637  5.3237630 -3.0500501  3.3322739
p21 <- ggplot(OverallDataComplete, aes(x = l1residual1)) +
  geom_histogram(aes(y = ..density..),
                 binwidth = 2,
                 colour = "black", fill = "white")+
  geom_density(alpha = .2, fill="#FF6666") +
  ggtitle("Residual Distribution for Absolute warmth Discrepancies")

Okay, there are perhaps some outliers in this residual distribution. I will conduct a qq plot to make sure.

p22 <- OverallDataComplete %>%
  ggplot(mapping = aes(sample = l1residual1)) +
  stat_qq() +
  ggtitle("qq Plot for Absolute Warmth Discrepancies")

grid.arrange(p21, p22)

So the residuals for this one is close to normally distributed but I am not sure. So this indicates that maybe more predictors need to be addded. The shape means the is slightly skewed and a bit heavy on tails.

regression

children_difficult_wave1 <- schooltransitiondata %>% select(1, 98:122)
children_difficult_wave1_filtered <- children_difficult_wave1 %>% filter(complete.cases(.))
children_difficult_wave1_filtered_sum <- children_difficult_wave1_filtered %>% 
  rowwise() %>%
  mutate(total_score_wave1 = sum(c_across(2:26), na.rm = TRUE)) %>%
  ungroup()

children_difficult_wave2 <- schooltransitiondata %>% select(1, 575:599)
children_difficult_wave2_filtered <- children_difficult_wave2 %>% filter(complete.cases(.))
children_difficult_wave2_filtered_sum <- children_difficult_wave2_filtered %>% 
  rowwise() %>%
  mutate(total_score_wave2 = sum(c_across(2:26), na.rm = TRUE)) %>%
  ungroup()

children_difficult_merge <- merge(children_difficult_wave1_filtered_sum, children_difficult_wave2_filtered_sum, by = "ID", all = FALSE)

check the difficult score distribution a bit

ggplot(children_difficult_merge, aes(x = total_score_wave1)) +
  geom_histogram(aes(y = ..density..), bins = 10, fill = "skyblue", color = "black", alpha = 0.7) +
  stat_function(fun = dnorm, args = list(mean = mean(children_difficult_merge$total_score_wave1, na.rm = TRUE), 
                                        sd = sd(children_difficult_merge$total_score_wave1, na.rm = TRUE)),
                color = "red", size = 1) +
  labs(title = "Distribution of Wave 1 Difficult Score", x = "Total Score", y = "Density") +
  theme_minimal()
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.

ggplot(children_difficult_merge, aes(x = total_score_wave2)) +
  geom_histogram(aes(y = ..density..), bins = 10, fill = "skyblue", color = "black", alpha = 0.7) +
  stat_function(fun = dnorm, args = list(mean = mean(children_difficult_merge$total_score_wave2, na.rm = TRUE), 
                                        sd = sd(children_difficult_merge$total_score_wave2, na.rm = TRUE)),
                color = "red", size = 1) +
  labs(title = "Distribution of Wave 2 Difficult Score", x = "Total Score", y = "Density") +
  theme_minimal()

discrepancy_score <- rbind(MaternalDataset1, PaternalDataset1)
regression_dataframe <- merge(children_difficult_merge, discrepancy_score, by = "ID", all = FALSE)

library(lavaan)
## This is lavaan 0.6-18
## lavaan is FREE software! Please report any bugs.
model_warmth <- '
total_score_wave2 ~ warmth_Discrepancies + total_score_wave1
'
fit_warmth <- sem(model_warmth, data = regression_dataframe)
summary(fit_warmth)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         3
## 
##   Number of observations                           398
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   total_score_wave2 ~                                    
##     wrmth_Dscrpncs      -0.043    0.039   -1.097    0.273
##     total_scor_wv1       0.515    0.047   11.071    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   17.263    1.224   14.107    0.000
model_hostility <- '
total_score_wave2 ~ hostility_Discrepancies + total_score_wave1
'
fit_hostility <- sem(model_hostility, data = regression_dataframe)
summary(fit_hostility)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         3
## 
##   Number of observations                           398
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   total_score_wave2 ~                                    
##     hstlty_Dscrpnc       0.023    0.055    0.414    0.679
##     total_scor_wv1       0.517    0.047   11.008    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   17.307    1.227   14.107    0.000

testing wave 2 discrepancy as predictor

discrepancy_score_test <- rbind(MaternalDataset2, PaternalDataset2)
regression_dataframe_test <- merge(children_difficult_merge, discrepancy_score_test, by = "ID", all = FALSE)

fit_warmth_test <- sem(model_warmth, data = regression_dataframe_test)
summary(fit_warmth_test)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         3
## 
##   Number of observations                           262
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   total_score_wave2 ~                                    
##     wrmth_Dscrpncs      -0.031    0.032   -0.951    0.342
##     total_scor_wv1       0.523    0.051   10.336    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   13.218    1.155   11.446    0.000
fit_hostility_test <- sem(model_hostility, data = regression_dataframe_test)
summary(fit_hostility_test)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         3
## 
##   Number of observations                           262
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                       Estimate  Std.Err  z-value  P(>|z|)
##   total_score_wave2 ~                                    
##     hstlty_Dscrpnc      -0.037    0.044   -0.849    0.396
##     total_scor_wv1       0.519    0.050   10.276    0.000
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   13.227    1.156   11.446    0.000

when using wave 2 discrepancy score to predict, we find the p value = .342 in the regression model of warmth discrepency, and we find the p value = .396 in the regression model of hostility discrepency

Testing what happens if we address the covariance between discrepancy scores and wave 1 total scores

model_warmth_test1 <- '
total_score_wave2 ~ 1 + warmth_Discrepancies + total_score_wave1
warmth_Discrepancies ~ total_score_wave1
'
fit_warmth_test1 <- sem(model_warmth_test1, data = regression_dataframe)
summary(fit_warmth_test1)
## lavaan 0.6-18 ended normally after 12 iterations
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##   Number of observations                           398
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                          Estimate  Std.Err  z-value  P(>|z|)
##   total_score_wave2 ~                                       
##     wrmth_Dscrpncs         -0.043    0.039   -1.097    0.273
##     total_scor_wv1          0.515    0.047   11.071    0.000
##   warmth_Discrepancies ~                                    
##     total_scor_wv1          0.014    0.060    0.241    0.809
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   10.130    1.039    9.746    0.000
##    .wrmth_Dscrpncs    0.507    1.342    0.378    0.706
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   17.263    1.224   14.107    0.000
##    .wrmth_Dscrpncs   28.789    2.041   14.107    0.000
model_hostility_test1 <- '
total_score_wave2 ~ 1 + hostility_Discrepancies + total_score_wave1
hostility_Discrepancies ~ total_score_wave1
'
fit_hostility_test1 <- sem(model_hostility_test1, data = regression_dataframe)
summary(fit_hostility_test1)
## lavaan 0.6-18 ended normally after 1 iteration
## 
##   Estimator                                         ML
##   Optimization method                           NLMINB
##   Number of model parameters                         7
## 
##   Number of observations                           398
## 
## Model Test User Model:
##                                                       
##   Test statistic                                 0.000
##   Degrees of freedom                                 0
## 
## Parameter Estimates:
## 
##   Standard errors                             Standard
##   Information                                 Expected
##   Information saturated (h1) model          Structured
## 
## Regressions:
##                             Estimate  Std.Err  z-value  P(>|z|)
##   total_score_wave2 ~                                          
##     hstlty_Dscrpnc             0.023    0.055    0.414    0.679
##     total_scor_wv1             0.517    0.047   11.008    0.000
##   hostility_Discrepancies ~                                    
##     total_scor_wv1            -0.107    0.042   -2.541    0.011
## 
## Intercepts:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   10.040    1.053    9.533    0.000
##    .hstlty_Dscrpnc    2.942    0.941    3.127    0.002
## 
## Variances:
##                    Estimate  Std.Err  z-value  P(>|z|)
##    .total_scor_wv2   17.307    1.227   14.107    0.000
##    .hstlty_Dscrpnc   14.146    1.003   14.107    0.000
#p value stays the same